clear
capture log close 
set more off 

/* Description:

  This file produces the US tables and figures for:
	*Appendix Table A1: Statistical significance of changes in IRP for US
	*Appendix Figure A1: mother-child associations, conditional on fathers
	*Appendix Figure A3: sensitivity to zeros
	*Appendix Figure A4: combined SRC and SEO
	*Appendix Figure A8: cohort groups
	*Appendix Figure A9(b): IGE trends
*/


********************************************************
*Table A1: Statistical significance of changes in IRP for US
********************************************************

***IGE estimates 

use *newid *LABc* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear
gen period=.
replace period=1 if inrange(year,1985,1995)
replace period=2 if inrange(year,1996,2007)
replace period=3 if inrange(year,2008,2019)
/* Rename US vars to match Swedish varnames */
rename female woman
rename newid idnr 
rename cohort yob 
rename LOGLABc learn_c 
rename m_cohort myob
rename f_cohort fyob
rename m_LABYRc mobsy_c 
rename f_LABYRc fobsy_c 
rename m_LABc mearn_c 
rename f_LABc fearn_c
rename f_LOGLABc lfearn_c 
rename m_LOGLABc lmearn_c
rename p_LOGMFAVGc lfmearn_c 
/* End US variable renames */
g agec1=year-yob-40
g agef1=fobsy_c-fyob
g agem1=mobsy_c-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn_c) & !missing(mearn_c)
replace maxage1=agem1 if mearn_c>fearn_c & !missing(mearn_c) & !missing(fearn_c)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}
*Figure 1: sons and daughters pooled on combined (mean) parental income
global fig "1"
global inc "learn_c"
global incp "lfmearn_c"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
	sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)

*Get number and list of years 
levelsof period, local(periodlist)
gl nperiods: word count `periodlist'
di "Number of years=$nperiods"
drop _all
set obs $nperiods
local i=0
gen period=.
forv period=1/$nperiods {
	local i=`i'+1
	replace period=`:word `i' of `periodlist'' if _n==`i'
}
local est "1 "
foreach e of local est{
	svmat b`e'
	svmat se`e'

	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
	
	g diff`e'=diffest`e'
	g diffse`e'=diffstderr`e'
	g diffpval`e'=diffpvalue`e'
}
rename b11 ige1 
lab var ige1 "pooled/parental average"
lab var diff1 "Period 3-1 diff: pooled/parental average"
lab var sd1u "pooled/parental average"
lab var sd1l "pooled/parental average"
compress
save ${us_results}/tabA1_us_ige.dta, replace

***IRP estimates
use *newid *LAB* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear
gen period=.
replace period=1 if inrange(year,1985,1995)
replace period=2 if inrange(year,1996,2007)
replace period=3 if inrange(year,2008,2019)
/* Rename US vars to match Swedish varnames */
rename newid idnr 
rename female woman
rename cohort yob 
rename pLAB pearn 
rename m_cohort myob
rename f_cohort fyob
rename m_LABYR mobsy 
rename f_LABYR fobsy 
rename m_LAB mearn 
rename f_LAB fearn
rename pm_LAB pmearn 
rename pf_LAB pfearn
rename pp_MFAVG pfmearn 
/* End US variable renames */
g agec1=year-yob-40
g agef1=fobsy-fyob
g agem1=mobsy-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn) & !missing(mearn)
replace maxage1=agem1 if mearn>fearn & !missing(mearn) & !missing(fearn)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}
* 1: sons and daughters pooled on combined (mean) parental income
global fig "1"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
	sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
* 2: sons and daughters separately on father's income
*Sons
global fig "2s"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
    sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
*Daughters
global fig "2d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
    sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
* 3: sons and daughters separately on mother's income
*Sons
global fig "3s"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
    sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
*Daughters
global fig "3d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
	sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
* 4: sons and daughters separately on combined (mean) parental income
*Sons
global fig "4s"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
	sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
*Daughters
global fig "4d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
	sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
* 5: sons and daughters pooled on father's and mother's income
*Fathers
global fig "5f"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
    sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)
*Mothers
global fig "5m"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.period#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof period, local(periods)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
foreach i of local periods {
    sca t1=_se[`i'.period#c.$incp]
	mat $se=$se\t1
}
mat $se=$se[2...,....]
lincom _b[3.period#c.$incp] - _b[1.period#c.$incp]  /* test stat sig of diff btwn period 3 and period 1 estimates */
sca diffest$fig=r(estimate)
sca diffstderr$fig=r(se)
sca diffpvalue$fig=r(p)

*Get number and list of years 
levelsof period, local(periodlist)
gl nperiods: word count `periodlist'
di "Number of periods=$nperiods"
drop _all
set obs $nperiods
local i=0
gen period=.
forv period=1/$nperiods {
	local i=`i'+1
	replace period=`:word `i' of `periodlist'' if _n==`i'
}
local est "1 2s 2d 3s 3d 4s 4d 5f 5m"
foreach e of local est{
	svmat b`e'
	svmat se`e'

	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
	
	g diff`e'=diffest`e'
	g diffse`e'=diffstderr`e'
	g diffpval`e'=diffpvalue`e'
}
rename (b11 b2s1 b2d1 b3s1 b3d1 b4s1 b4d1 b5f1 b5m1) (irp1 irp2s irp2d irp3s irp3d irp4s irp4d irp5f irp5m)
lab var irp1 "pooled/parental average"
lab var irp2s "son/father"
lab var irp2d "daughter/father"
lab var irp3s "son/mother"
lab var irp3d "daughter/mother"
lab var irp4s "son/parental average"
lab var irp4d "daughter/parental average"
lab var irp5f "pooled/father"
lab var irp5m "pooled/mother"
lab var diff1 "Period 3-1 diff: pooled/parental average"
lab var diff2s "Period 3-1 diff: son/father"
lab var diff2d "Period 3-1 diff: daughter/father"
lab var diff3s "Period 3-1 diff: son/mother"
lab var diff3d "Period 3-1 diff: daughter/mother"
lab var diff4s "Period 3-1 diff: son/parental average"
lab var diff4d "Period 3-1 diff: daughter/parental average"
lab var diff5f "Period 3-1 diff: pooled/father"
lab var diff5m "Period 3-1 diff: pooled/mother"
lab var sd1u "pooled/parental average"
lab var sd2su "son/father"
lab var sd2du "daughter/father"
lab var sd3su "son/mother"
lab var sd3du "daughter/mother"
lab var sd4su "son/parental average"
lab var sd4du "daughter/parental average"
lab var sd5fu "pooled/father"
lab var sd5mu "pooled/mother"
lab var sd1l "pooled/parental average"
lab var sd2sl "son/father"
lab var sd2dl "daughter/father"
lab var sd3sl "son/mother"
lab var sd3dl "daughter/mother"
lab var sd4sl "son/parental average"
lab var sd4dl "daughter/parental average"
lab var sd5fl "pooled/father"
lab var sd5ml "pooled/mother"
compress
save ${us_results}/tabA1_us_irp.dta, replace

*** Now create table 
*IGE 
use ${us_results}/tabA1_us_ige.dta, clear
keep period ige1 se11 diff1 diffse1 diffpval1
drop if period==2
foreach var in diff1 diffse1 diffpval1 {
	replace `var'=. if period==3
}
rename  (ige1 se11 diff1 diffse1 diffpval1) (est0 se0 diff0 diffse0 diffpval0)
tempfile ige
save `ige'  
*IRP 
use ${us_results}/tabA1_us_irp.dta, clear
drop sd*
drop if period==2
renvars se11 se2d1 se2s1 se3d1 se3s1 se4d1 se4s1 se5f1 se5m1, postsub(1 ) 
renvars irp*, presub(irp est)
merge 1:1 period using `ige' , nogen
reshape long est se diff diffse diffpval , i(period) j(model) string
sort model period
list 
foreach var in diff diffse diffpval {
	replace `var'=. if period==3
}
foreach var in est se diff diffse  {
	replace `var'=round(`var',.001)
}
foreach var in diffpval {
	replace `var'=round(`var',.0001)
}
*Use figure numbers rather than model numbers, so order aligns with paper 
gen fig=""
replace fig="1" if model=="0"
replace fig="2" if model=="1"
replace fig="3s" if model=="4s"
replace fig="3d" if model=="4d"
replace fig="4f" if model=="5f"
replace fig="4m" if model=="5m"
replace fig="5fs" if model=="2s"
replace fig="5fd" if model=="2d"
replace fig="5ms" if model=="3s"
replace fig="5md" if model=="3d"
gen descrip=""
replace descrip="IGE (parent-child)" if model=="0"
replace descrip="IRP (parent-child)" if model=="1"
replace descrip="IRP (parent-son)" if model=="4s"
replace descrip="IRP (parent-daughter)" if model=="4d"
replace descrip="IRP (father-child)" if model=="5f"
replace descrip="IRP (mother-child)" if model=="5m"
replace descrip="IRP (father-son)" if model=="2s"
replace descrip="IRP (father-daughter)" if model=="2d"
replace descrip="IRP (mother-son)" if model=="3s"
replace descrip="IRP (mother-daughter)" if model=="3d"
replace descrip="" if period==3

order model fig descrip, first
sort fig period 

label define periodvals 1 "1985-95" 2 "1996-2007" 3 "2008-19"
label values period periodvals 

gen period_str=""
replace period_str="1985-95" if period==1
replace period_str="2008-19" if period==3

label var descrip "Persistence measure"
label var period_str "Time period" 
label var est "Estimate"
label var se "Std. Err."
label var diff "Difference (late-early)"
label var diffse "se(diff)"
label var diffpval "p-value (diff)"

*TABLE A1: Statistical significance of changes in IRP for US
texsave descrip period_str est se diff diffse diffpval using ${tabfig}/tableA1.tex, align(l r r r r r r r r r) replace varlabels nofix title("Statistical significance of changes in IRP for US") decimalalign





********************************************************
*Figure A1: mother-child associations, conditional on fathers
********************************************************

global yrmin=1985
global yrmax=2018	
// 2018 for PSID
 loc yrmin= ${yrmin} 	
 loc yrmax= ${yrmax} 
 loc agemin=25 
 loc agemax=48 

use *newid *LAB* AGE *cohort *LABYR* *MF* *empl* *schmax year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

gen AGEC1=year-cohort-40 
gen f_LABAGE_1=f_LABYR-f_cohort 
gen m_LABAGE_1=m_LABYR-m_cohort 
forval i=2/4{
	gen AGEC`i'=AGEC1^`i'
	gen f_LABAGE_`i'=f_LABAGE_1^`i'
	gen m_LABAGE_`i'=m_LABAGE_1^`i'
}
*Use sample with mothers AND fathers
 keep if pm_LAB!=. & m_LABAGE_1!=. & pf_LAB!=. & f_LABAGE_1!=.
 keep if m_emplavg!=. & f_emplavg!=.
 keep if m_schmax!=. & f_schmax!=.
*Residualize all measures using mother's AND father's age (quartic)
 foreach var in pLAB employ schmax pm_LAB m_emplavg m_schmax pf_LAB f_emplavg f_schmax  {
	qui regress `var' m_LABAGE_? f_LABAGE_?   AGEC? i.year
	predict `var'_r, resid
 }
*need yearlist for loops due to biennial surveys
 levelsof year, local(yearlist) 
 rename AGE AGEC 
*subsample
 keep if inrange(year,`yrmin',`yrmax') & inrange(AGEC,`agemin',`agemax')
 tab year, su(AGEC)
 
rename AGEC AGE_C // so AGEC* only picks up quartic AGEC1-AGEC4
 
gen yr_gr=_n
replace yr_gr = yr_gr+${yrmin}-1
replace yr_gr=. if yr_gr>${yrmax}

*Figure A1B: (a=Sweden,b=US) Mother's IRP Conditional on father's income
gen b_irp=.
gen se_irp=.
reg pLAB i.year#c.pm_LAB c.AGEC*#c.pm_LAB i.year m_LABAGE* AGEC* , cluster(newid) 
 replace b_irp=_b[1985b.year#c.pm_LAB] if yr_gr==1985
 replace se_irp=_se[1985b.year#c.pm_LAB] if yr_gr==1985
foreach y of local yearlist {
 replace b_irp=_b[`y'.year#c.pm_LAB] if yr_gr==`y'
 replace se_irp=_se[`y'.year#c.pm_LAB] if yr_gr==`y'
}
gen b_irp_cond=.
gen se_irp_cond=.
reg pLAB i.year#c.pm_LAB c.AGEC*#c.pm_LAB i.year m_LABAGE* AGEC*  i.year#c.pf_LAB c.AGEC*#c.pf_LAB, cluster(newid) 
 replace b_irp_cond=_b[1985b.year#c.pm_LAB] if yr_gr==1985
 replace se_irp_cond=_se[1985b.year#c.pm_LAB] if yr_gr==1985
foreach y of local yearlist {
 replace b_irp_cond=_b[`y'.year#c.pm_LAB] if yr_gr==`y'
 replace se_irp_cond=_se[`y'.year#c.pm_LAB] if yr_gr==`y'
}
label variable yr_gr "year"
	gen ci_low 	= b_irp - 1.96*se_irp
    gen ci_high = b_irp + 1.96*se_irp
	gen ci_low_cond 	= b_irp_cond - 1.96*se_irp_cond
    gen ci_high_cond = b_irp_cond + 1.96*se_irp_cond	
*Figure	
twoway (connect b_irp yr_gr, ms(o) lc(ebblue) mc(ebblue)) (rspike ci_low ci_high  yr_gr, lc(ebblue)) /*
*/(connect b_irp_cond yr_gr, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike ci_low_cond ci_high_cond yr_gr, lc(cranberry)), /*
*/ ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) /*
*/legend(order(1 3) label(1 "Uncond.") label(3 "Cond. on father's income rank")) /*
*/ xsc(r(1985 2018)) xlab(1985(5)2015) 
graph export "${tabfig}/figA1b.pdf", replace
graph export "${tabfig}/figA1b.png", replace


*Figure A1d (c=Sweden,d=US) Income-schooling regressions
cap drop b_*
cap drop se_*
cap drop ci_*
gen b_irp=.
gen se_irp=.
reg pLAB i.year#c.m_schmax c.AGEC*#c.m_schmax i.year m_LABAGE* AGEC* , cluster(newid) 
 replace b_irp=_b[1985b.year#c.m_schmax] if yr_gr==1985
 replace se_irp=_se[1985b.year#c.m_schmax] if yr_gr==1985
foreach y of local yearlist {
 replace b_irp=_b[`y'.year#c.m_schmax] if yr_gr==`y'
 replace se_irp=_se[`y'.year#c.m_schmax] if yr_gr==`y'
}
gen b_irp_cond=.
gen se_irp_cond=.
reg pLAB i.year#c.m_schmax c.AGEC*#c.m_schmax i.year m_LABAGE* AGEC*  i.year#c.f_schmax c.AGEC*#c.f_schmax , cluster(newid) 
 replace b_irp_cond=_b[1985b.year#c.m_schmax] if yr_gr==1985
 replace se_irp_cond=_se[1985b.year#c.m_schmax] if yr_gr==1985
foreach y of local yearlist {
 replace b_irp_cond=_b[`y'.year#c.m_schmax] if yr_gr==`y'
 replace se_irp_cond=_se[`y'.year#c.m_schmax] if yr_gr==`y'
}
label variable yr_gr "year"
	gen ci_low 	= b_irp - 1.96*se_irp
    gen ci_high = b_irp + 1.96*se_irp
	gen ci_low_cond 	= b_irp_cond - 1.96*se_irp_cond
    gen ci_high_cond = b_irp_cond + 1.96*se_irp_cond
*Figure
twoway (connect b_irp yr_gr, ms(o) lc(ebblue) mc(ebblue)) (rspike ci_low ci_high  yr_gr, lc(ebblue)) /*
*/(connect b_irp_cond yr_gr, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike ci_low_cond ci_high_cond yr_gr, lc(cranberry)), /*
*/ ytitle("Persistence") xtitle("Year") graphr(fc(white) c(white)) /*
*/legend(order(1 3) label(1 "Uncond.") label(3 "Cond. on father's schooling")) /*
*/ xsc(r(1985 2018)) xlab(1985(5)2015) 
graph export "${tabfig}/figA1d.pdf", replace





********************************************************
*Figure A3: sensitivity to zeros
********************************************************

*Save id-numbers from IGE samples for daughter-parent and mother-child regressions
use *newid *LABc* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, replace

/* Rename US vars to match Swedish varnames */
rename female woman
rename newid idnr 
rename cohort yob 
rename LOGLABc learn_c 

rename m_cohort myob
rename f_cohort fyob
rename m_LABYRc mobsy_c 
rename f_LABYRc fobsy_c 

rename m_LABc mearn_c 
rename f_LABc fearn_c

rename f_LOGLABc lfearn_c 
rename m_LOGLABc lmearn_c
rename p_LOGMFAVGc lfmearn_c 
/* End US variable renames */

g agec1=year-yob-40
g agef1=fobsy_c-fyob
g agem1=mobsy_c-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn) & !missing(mearn)
replace maxage1=agem1 if mearn>fearn & !missing(mearn) & !missing(fearn)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}
*Sons on combined (mean) parental income
global inc "learn_c"
global incp "lfmearn_c"
global agep "fmage"
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
preserve
keep if e(sample)==1
keep idnr
duplicates drop idnr, force
compress
sort idnr
tempfile samp4s
	save `samp4s'
restore 
*Daughters on combined (mean) parental income
global inc "learn_c"
global incp "lfmearn_c"
global agep "fmage"
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
preserve
keep if e(sample)==1
keep idnr
duplicates drop idnr, force
compress
sort idnr
tempfile samp4d
	save `samp4d'
restore
*sons and daughters pooled on father's income
global inc "learn_c"
global incp "lfearn_c"
global agep "agef"
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
preserve
keep if e(sample)==1
keep idnr
duplicates drop idnr, force
compress
sort idnr
tempfile samp5f
	save `samp5f'
restore
*sons and daughters pooled on mother's income
global inc "learn_c"
global incp "lmearn_c"
global agep "agem"
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
keep if e(sample)==1
keep idnr
duplicates drop idnr, force
compress
sort idnr
tempfile samp5m
	save `samp5m'

*IRP estimates for 4d and 5m samples using IGE samples
use *newid *LAB* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

/* Rename US vars to match Swedish varnames */
rename newid idnr 
rename female woman
rename cohort yob 
rename pLAB pearn 

rename m_cohort myob
rename f_cohort fyob
rename m_LABYR mobsy 
rename f_LABYR fobsy 

rename m_LAB mearn 
rename f_LAB fearn

rename pm_LAB pmearn 
rename pf_LAB pfearn
rename pp_MFAVG pfmearn 
/* End US variable renames */

g agec1=year-yob-40
g agef1=fobsy-fyob
g agem1=mobsy-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn) & !missing(mearn)
replace maxage1=agem1 if mearn>fearn & !missing(mearn) & !missing(fearn)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}
merge m:1 idnr using `samp4s', gen(samp4s)
merge m:1 idnr using `samp4d', gen(samp4d)
merge m:1 idnr using `samp5f', gen(samp5f)
merge m:1 idnr using `samp5m', gen(samp5m)
keep if samp4s==3 | samp4d==3 | samp5f==3 | samp5m==3

*Sons on combined (mean) parental income
global fig "4s"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if samp4s==3, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Daughters on combined (mean) parental income
global fig "4d"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if samp4d==3, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*sons and daughters pooled on father's income
global fig "5f"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if samp5f==3, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*sons and daughters pooled on mother's income
global fig "5m"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if samp5m==3, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Get number and list of years 
levelsof year, local(yearlist)
gl nyears: word count `yearlist'
di "Number of years=$nyears"

drop _all
set obs $nyears
local i=0
gen year=.
forv year=1/$nyears {
	local i=`i'+1
	replace year=`:word `i' of `yearlist'' if _n==`i'
}

local est "4s 4d 5f 5m"
foreach e of local est{
	svmat b`e'
	svmat se`e'
	svmat var`e'
	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
}
rename (b4s1 b4d1 b5f1 b5m1 sd4su sd4du sd5fu sd5mu sd4sl sd4dl sd5fl sd5ml) (rob4s rob4d rob5f rob5m sd4su_rob sd4du_rob sd5fu_rob sd5mu_rob sd4sl_rob sd4dl_rob sd5fl_rob sd5ml_rob)
*Adding main IRP estimates
merge 1:1 year using "${us_results}/figures_us_irp", keep(3) keepusing(irp4s irp4d irp5f irp5m sd4su sd4du sd5fu sd5mu sd4sl sd4dl sd5fl sd5ml) nogen
rename (sd4su sd4du sd5fu sd5mu sd4sl sd4dl sd5fl sd5ml) (sd4su_irp sd4du_irp sd5fu_irp sd5mu_irp sd4sl_irp sd4dl_irp sd5fl_irp sd5ml_irp)

lab var rob4s "son/parental average"
lab var rob4d "daughter/parental average"
lab var rob5f "pooled/father"
lab var rob5m "pooled/mother"
lab var var4s1 "son/parental average"
lab var var4d1 "daughter/parental average"
lab var var5f1 "pooled/father"
lab var var5m1 "pooled/mother"
lab var sd4su_rob "son/parental average"
lab var sd4du_rob "daughter/parental average"
lab var sd5fu_rob "pooled/father"
lab var sd5mu_rob "pooled/mother"
lab var sd4sl_rob "son/parental average"
lab var sd4dl_rob "daughter/parental average"
lab var sd5fl_rob "pooled/father"
lab var sd5ml_rob "pooled/mother"
compress
save "${us_results}/figA3_us_irp", replace
use "${us_results}/figA3_us_irp", replace

*Appendix figure A3a: sons on combined parental income
twoway (connect rob4s year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd4su_rob sd4sl_rob year, lc(ebblue)) (connect irp4s year, ms(d) lc(blue) mc(blue) lp(solid)) (rspike sd4su_irp sd4sl_irp year, lc(blue)), ytitle("IRP") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "IRP using IGE sample") label(3 "IRP")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA3a", replace)
graph export "${tabfig}/figA3a.pdf", replace

*Appendix figure A3b: daughters on combined parental income
twoway (connect rob4d year, ms(o) lc(cranberry) mc(cranberry)) (rspike sd4du_rob sd4dl_rob year, lc(cranberry)) (connect irp4d year, ms(d) lc(red) mc(red) lp(solid)) (rspike sd4du_irp sd4dl_irp year, lc(red)), ytitle("IRP") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "IRP using IGE sample") label(3 "IRP")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA3b", replace)
graph export "${tabfig}/figA3b.pdf", replace

*Appendix figure A3c: sons and daughters pooled on father's income
twoway (connect rob5f year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd5fu_rob sd5fl_rob year, lc(ebblue)) (connect irp5f year, ms(d) lc(blue) mc(blue)) (rspike sd5fu_irp sd5fl_irp year, lc(blue)), ytitle("IRP") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "IRP using IGE sample") label(3 "IRP")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA3c", replace)
graph export "${tabfig}/figA3c.pdf", replace

*Appendix figure A3d: sons and daughters pooled on mother's income
twoway (connect rob5m year, ms(o) lc(cranberry) mc(cranberry)) (rspike sd5mu_rob sd5ml_rob year, lc(cranberry)) (connect irp5m year, ms(d) lc(red) mc(red)) (rspike sd5mu_irp sd5ml_irp year, lc(red)), ytitle("IRP") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "IRP using IGE sample") label(3 "IRP")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA3d", replace)
graph export "${tabfig}/figA3d.pdf", replace

clear






********************************************************
*Figure A4: combined SRC and SEO
********************************************************
use *newid *LAB* *cohort *LABYR* *MF* year female AGE src iwgt stratum cluster using ${projdata}/analysis-sample.dta, clear

*Impose sample restrictions
//keep if src==1 
keep if inrange(AGE,25,48)
keep if inrange(cohort,1952,1993)
drop if (m_newid==. & f_newid==.) 	// matched to at least one parent
drop if m_LAB==. & f_LAB==.			// observe at least one parent's income
drop if LAB==.
keep if year>=1985
tab src, m
*Get most recent positive weight for child 
sort newid year
gen year_poswt=year if iwgt>0 & iwgt<.
bysort newid: egen lastyr_poswt=max(year_poswt)
gen tempwt=iwgt if year==lastyr_poswt
bysort newid: egen lastposwt=max(tempwt)
label var lastposwt "Most recent positive weight of child"
drop year_poswt lastyr_poswt tempwt 
local wtvar "lastposwt"
*Set weighting 
svyset cluster [pweight=`wtvar'], strata(stratum)

/*
NEED TO USE THIS CODE WHEN USING SAMPLE WEIGHTS (EGEN DOESN'T WORK WITH WEIGHTS)
It works correctly when ranking within regression sample, but need minor adjustment for parents. 
The code will only work correctly though when restricting to the observations for non-missing incomes for 
the respective parent (matched to either son OR daughter). So we just rank these subsamples, create tempfiles
with the ranks and merge them in to the full sample.
*/

*Rank children by gender and cohort
local vars "LAB "
foreach x of local vars {
	sort female cohort `x' 					 
	by   female cohort : gen i=sum(`wtvar')	 
	bys  female cohort : egen n=max(i) 		 
	gen  rank1=(i-1)/(n-1)*100 			
	bys  female cohort `x': egen p`x'=mean(rank1) 
	drop n i rank1
}	
*Rank mothers/fathers separately, first by child gender and cohort
local vars "f_LAB m_LAB p_MFAVG "
foreach x of local vars {
  preserve
  drop if `x'==.	// For code to correctly compute parent ranks, need to only use obs with non-missing parent incomes.
	sort female cohort `x' 					 
	by   female cohort : gen i=sum(`wtvar')	 
	bys  female cohort : egen n=max(i) 		 
	gen  rank1=(i-1)/(n-1)*100 			
	bys  female cohort `x': egen p`x'=mean(rank1) 
	drop n i rank1
  keep newid year p`x'
  tempfile ranks`x'
    save  `ranks`x''
  restore
  merge 1:1 newid year using `ranks`x'', nogen	
}
*Now keep years 1985 onwards 
keep if year>=1985

/* Rename US vars to match Swedish varnames */
rename newid idnr 
rename female woman
rename cohort yob 
rename pLAB pearn 

rename m_cohort myob
rename f_cohort fyob
rename m_LABYR mobsy 
rename f_LABYR fobsy 

rename m_LAB mearn 
rename f_LAB fearn

rename pm_LAB pmearn 
rename pf_LAB pfearn
rename pp_MFAVG pfmearn 
/* End US variable renames */

g agec1=year-yob-40
g agef1=fobsy-fyob
g agem1=mobsy-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn) & !missing(mearn)
replace maxage1=agem1 if mearn>fearn & !missing(mearn) & !missing(fearn)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}

*Sons on father's income
global fig "2s"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
svy: reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0 // ,cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Daughters on father's income
global fig "2d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
svy: reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1 // ,cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Sons on mother's income
global fig "3s"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
svy: reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==0 // ,cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
    sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters on mother's income
global fig "3d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
svy: reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp if woman==1 // ,cluster(idnr)
levelsof year, local(year)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Get number and list of years 
levelsof year, local(yearlist)
gl nyears: word count `yearlist'
di "Number of years=$nyears"

drop _all
set obs $nyears
local i=0
gen year=.
forv year=1/$nyears {
	local i=`i'+1
	replace year=`:word `i' of `yearlist'' if _n==`i'
}
local est "2s 2d 3s 3d"
foreach e of local est{
	svmat b`e'
	svmat se`e'
	svmat var`e'
	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
}
rename (b2s1 b2d1 b3s1 b3d1) (irp2s irp2d irp3s irp3d)
lab var irp2s "son/father"
lab var irp2d "daughter/father"
lab var irp3s "son/mother"
lab var irp3d "daughter/mother"

lab var var2s1 "son/father"
lab var var2d1 "daughter/father"
lab var var3s1 "son/mother"
lab var var3d1 "daughter/mother"

lab var sd2su "son/father"
lab var sd2du "daughter/father"
lab var sd3su "son/mother"
lab var sd3du "daughter/mother"

lab var sd2sl "son/father"
lab var sd2dl "daughter/father"
lab var sd3sl "son/mother"
lab var sd3dl "daughter/mother"

compress
save ${us_results}/figA4_us_irp.dta, replace

*Figure A4a: sons and daughters separately on father's income
twoway (connect irp2s year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd2su sd2sl year, lc(ebblue)) (connect irp2d year, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd2du sd2dl year, lc(cranberry)), ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA4a", replace)
graph export "${tabfig}/figA4a.pdf", replace

*Figure A4b: sons and daughters separately on mother's income
twoway (connect irp3s year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd3su sd3sl year, lc(ebblue)) (connect irp3d year, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd3du sd3dl year, lc(cranberry)), ytitle("Rank persistence") xtitle("Year") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA4b", replace)
graph export "${tabfig}/figA4b.pdf", replace

clear






********************************************************
*Figure A8: cohort groups
********************************************************
use *newid *LAB* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

/* Rename US vars to match Swedish varnames */
rename newid idnr 
rename female woman
rename cohort yob 
rename LAB earn
rename pLAB pearn 
rename m_cohort myob
rename f_cohort fyob
rename m_LABYR mobsy 
rename f_LABYR fobsy 
rename m_LAB mearn 
rename f_LAB fearn
rename pm_LAB pmearn 
rename pf_LAB pfearn
rename p_MFAVG fmearn
rename pp_MFAVG pfmearn 
/* End US variable renames */

*** Create  5-year cohort groups
gen cohort=.
replace cohort=1 if (yob>=1952 & yob<=1956)
replace cohort=2 if (yob>=1957 & yob<=1961)
replace cohort=3 if (yob>=1962 & yob<=1966)
replace cohort=4 if (yob>=1967 & yob<=1971)
replace cohort=5 if (yob>=1972 & yob<=1976)
replace cohort=6 if (yob>=1977 & yob<=1981)
replace cohort=7 if (yob>=1982 & yob<=1986)
replace cohort=8 if (yob>=1987 & yob<=1993)
cap noi label drop cohortgroupvals
label define cohortgroupvals 1 "1952-56" 2 "1957-61" 3 "1962-66" 4 "1967-71" 5 "1972-76" 6 "1977-81" 7 "1982-86" 8 "1987-93"

g agec1=year-yob-40
g agef1=fobsy-fyob
g agem1=mobsy-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn) & !missing(mearn)
replace maxage1=agem1 if mearn>fearn & !missing(mearn) & !missing(fearn)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}
*Figure A8c: sons and daughters separately on father's income
*Sons
global fig "2s"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
    sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters
global fig "2d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
    sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure A8d: sons and daughters separately on mother's income
*Sons
global fig "3s"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
    sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters
global fig "3d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
	sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure A8a: sons and daughters separately on combined (mean) parental income
*Sons
global fig "4s"
global inc "pearn"
global incp "pfmearn"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp if woman==0, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
	sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Daughters
global fig "4d"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp if woman==1, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
	sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Figure A8b: sons and daughters pooled on father's and mother's income
*Fathers
global fig "5f"
global inc "pearn"
global incp "pfearn"
global agep "agef"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
    sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]
*Mothers
global fig "5m"
global inc "pearn"
global incp "pmearn"
global agep "agem"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.cohort#c.$incp i.cohort $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof cohort, local(cohorts)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local cohorts {
    sca t1=_se[`i'.cohort#c.$incp]
	mat $se=$se\t1
	sum $inc if cohort==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if cohort==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]

*Get number and list of cohorts
levelsof cohort, local(cohortlist)
gl ncohorts: word count `cohortlist'
di "Number of cohorts=$ncohorts"

drop _all
set obs $ncohorts
local i=0
gen cohort=.
forv cohort=1/$ncohorts {
	local i=`i'+1
	replace cohort=`:word `i' of `cohortlist'' if _n==`i'
}
local est "2s 2d 3s 3d 4s 4d 5f 5m"
foreach e of local est{
	svmat b`e'
	svmat se`e'
	svmat var`e'
	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
}
rename ( b2s1 b2d1 b3s1 b3d1 b4s1 b4d1 b5f1 b5m1) ( irp2s irp2d irp3s irp3d irp4s irp4d irp5f irp5m)
lab var irp2s "son/father"
lab var irp2d "daughter/father"
lab var irp3s "son/mother"
lab var irp3d "daughter/mother"
lab var irp4s "son/parental average"
lab var irp4d "daughter/parental average"
lab var irp5f "pooled/father"
lab var irp5m "pooled/mother"
lab var sd2su "son/father"
lab var sd2du "daughter/father"
lab var sd3su "son/mother"
lab var sd3du "daughter/mother"
lab var sd4su "son/parental average"
lab var sd4du "daughter/parental average"
lab var sd5fu "pooled/father"
lab var sd5mu "pooled/mother"
lab var sd2sl "son/father"
lab var sd2dl "daughter/father"
lab var sd3sl "son/mother"
lab var sd3dl "daughter/mother"
lab var sd4sl "son/parental average"
lab var sd4dl "daughter/parental average"
lab var sd5fl "pooled/father"
lab var sd5ml "pooled/mother"
compress
*Add value labels to cohort groups
label values cohort cohortgroupvals
tab cohort
save ${us_results}/figA8_us_irp.dta, replace

*Figure A8c: sons and daughters separately on father's income
twoway (connect irp2s cohort, ms(o) lc(ebblue) mc(ebblue)) (rspike sd2su sd2sl cohort, lc(ebblue)) (connect irp2d cohort, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd2du sd2dl cohort, lc(cranberry)), ytitle("Rank persistence") xtitle("Cohorts") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) /*xsc(r(1985 2020))*/ xlab(`cohortlist', valuelabel angle(45)) saving("${tabfig}/figA8c", replace)
graph export "${tabfig}/figA8c.pdf", replace
*Figure A8d: sons and daughters separately on mother's income
twoway (connect irp3s cohort, ms(o) lc(ebblue) mc(ebblue)) (rspike sd3su sd3sl cohort, lc(ebblue)) (connect irp3d cohort, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd3du sd3dl cohort, lc(cranberry)), ytitle("Rank persistence") xtitle("Cohorts") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) /*xsc(r(1985 2020))*/ xlab(`cohortlist', valuelabel angle(45)) saving("${tabfig}/figA8d", replace)
graph export "${tabfig}/figA8d.pdf", replace
*Figure A8a: sons and daughters separately on combined (mean) parental income
twoway (connect irp4s cohort, ms(o) lc(ebblue) mc(ebblue)) (rspike sd4su sd4sl cohort, lc(ebblue)) (connect irp4d cohort, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd4du sd4dl cohort, lc(cranberry)), ytitle("Rank persistence") xtitle("Cohorts") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Sons") label(3 "Daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) /*xsc(r(1985 2020))*/ xlab(`cohortlist', valuelabel angle(45)) saving("${tabfig}/figA8a", replace)
graph export "${tabfig}/figA8a.pdf", replace
*Figure A8b: sons and daughters pooled on father's and mother's income
twoway (connect irp5f cohort, ms(o) lc(ebblue) mc(ebblue)) (rspike sd5fu sd5fl cohort, lc(ebblue)) (connect irp5m cohort, ms(t) lc(cranberry) mc(cranberry) lp(solid)) (rspike sd5mu sd5ml cohort, lc(cranberry)),  ytitle("Rank persistence") xtitle("Cohorts") graphr(fc(white) c(white)) legend(order(1 3) label(1 "Fathers") label(3 "Mothers")) ysc(r(0 0.45)) ylab(0(0.05)0.45) /*xsc(r(1985 2020))*/ xlab(`cohortlist', valuelabel angle(45)) saving("${tabfig}/figA8b", replace)
graph export "${tabfig}/figA8b.pdf", replace

clear









********************************************************
*Figure A9(b): IGE trends
********************************************************

use *newid *LABc* *cohort *LABYR* *MF* year female if year>=1985 using ${projdata}/analysis-sample-main.dta, clear

/* Rename US vars to match Swedish varnames */
rename female woman
rename newid idnr 
rename cohort yob 
rename LOGLABc learn_c 

rename m_cohort myob
rename f_cohort fyob
rename m_LABYRc mobsy_c 
rename f_LABYRc fobsy_c 

rename m_LABc mearn_c 
rename f_LABc fearn_c

rename f_LOGLABc lfearn_c 
rename m_LOGLABc lmearn_c
rename p_LOGMFAVGc lfmearn_c 
/* End US variable renames */

g agec1=year-yob-40
g agef1=fobsy_c-fyob
g agem1=mobsy_c-myob
egen fmage1=rowmean(agef1 agem1)
g maxage1=agef1
replace maxage1=agem1 if missing(fearn_c) & !missing(mearn_c)
replace maxage1=agem1 if mearn_c>fearn_c & !missing(mearn_c) & !missing(fearn_c)
forval i=2/4{
	g agec`i'=agec1^`i'
	g agef`i'=agef1^`i'
	g agem`i'=agem1^`i'
	g fmage`i'=fmage1^`i'
	g maxage`i'=maxage1^`i'
}

*1: sons and daughters pooled on combined (mean) parental income
global fig "1"
global inc "learn_c"
global incp "lfmearn_c"
global agep "fmage"
global beta b$fig /*matrix to store estimates*/
global se se$fig /*matrix to store standard errors*/
global var var$fig /*matrix to store variance of income measures*/
reg $inc i.year#c.$incp i.year $agep* agec* c.agec*#c.$incp, cluster(idnr)
levelsof year, local(years)
mat $beta=e(b)'
mat $beta=$beta[1..r(r),....]
mat $se=(0)
mat $var=(0,0)
foreach i of local years{
	sca t1=_se[`i'.year#c.$incp]
	mat $se=$se\t1
	sum $inc if year==`i' & e(sample)==1
	sca t2=r(Var)
	sum $incp if year==`i' & e(sample)==1
	sca t3=r(Var)
	mat $var=$var\t2,t3
}
mat $se=$se[2...,....]
mat $var=$var[2...,....]


*Get number and list of years 
levelsof year, local(yearlist)
gl nyears: word count `yearlist'
di "Number of years=$nyears"

drop _all
set obs $nyears
local i=0
gen year=.
forv year=1/$nyears {
	local i=`i'+1
	replace year=`:word `i' of `yearlist'' if _n==`i'
}

local est "1 "
foreach e of local est{
	svmat b`e'
	svmat se`e'
	svmat var`e'
	g sd`e'u=b`e'1+1.96*se`e'1
	g sd`e'l=b`e'1-1.96*se`e'1
}
rename b11 ige1 

lab var ige1 "pooled/parental average"
lab var var11 "pooled/parental average"
lab var sd1u "pooled/parental average"
lab var sd1l "pooled/parental average"

compress
save ${us_results}/figA9_us_ige.dta, replace


*Figure: sons and daughters pooled on combined (mean) parental income
twoway (connect ige1 year, ms(o) lc(ebblue) mc(ebblue)) (rspike sd1u sd1l year, lc(ebblue)),  ytitle("Elasticity") xtitle("Year") graphr(fc(white) c(white)) legend(order(1) label(1 "Pooled sons and daughters")) ysc(r(0 0.45)) ylab(0(0.05)0.45) xsc(r(1985 2020)) xlab(1985(5)2020) saving("${tabfig}/figA9b", replace)
graph export "${tabfig}/figA9b.pdf", replace

